
# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = FALSE)
  # Packages
  library(magrittr)
  library(data.table)
  library(stringr)
  library(lubridate)
  library(foreach)
  library(doParallel)
  # Directories of joined bill-weather files
  dir_project <- "S:/Edward/Elasticity/"
  dir_csv     <- paste0(dir_project, "DataCsv/")
  dir_rds     <- paste0(dir_project, "DataR/")
  dir_pge     <- paste0(dir_rds, "/Prices/PGE/")
  dir_socal   <- paste0(dir_rds, "/Prices/SoCal/")

# Find files -------------------------------------------------------------------
  # Find the file names for each utility
  pge_files <- dir_pge %>% dir()
  socal_files <- dir_socal %>% dir()
  # Find the zip codes for each utility
  pge_zips <- gsub("[^0-9]", "", pge_files)
  socal_zips <- gsub("[^0-9]", "", socal_files)

# PGE data work ----------------------------------------------------------------
  # Start the cluster
  cl <- makeCluster(4)
  registerDoParallel(cl)
  # Loop over the zip codes
  foreach(i = pge_zips,
    .packages = c("data.table", "magrittr", "lubridate", "stringr")) %dopar% {
    # Garbage control
    gc()
    # Load the zip codes' file
    zip_dt <- readRDS(paste0(dir_pge, "prices_pge_", i, ".rds"))
    # Keep bills with 28-34 days (matches selection in regression work)
    zip_dt <- zip_dt[between(days, 28, 34)]
    # If ppps_charge is missing, make it zero (started halfway through data)
    zip_dt[is.na(ppps_charge), ppps_charge := 0]
    # Calculate total bill
    zip_dt[, total_bill := thm_base * price_base + thm_excess * price_excess +
      thm * ppps_charge]
    # Summarize key variables by month
    zip_summary <- zip_dt[, list(
      allow_day_mean    = mean(allowance / days),
      price_base_mean   = mean(price_base, na.rm = T),
      price_excess_mean = mean(price_excess, na.rm = T),
      thm_mean          = mean(thm, na.rm = T),
      thm_sum           = sum(thm, na.rm = T),
      thm_base_mean     = mean(thm_base, na.rm = T),
      thm_base_sum      = sum(thm_base, na.rm = T),
      thm_excess_mean   = mean(thm_excess, na.rm = T),
      thm_excess_sum    = sum(thm_excess, na.rm = T),
      bill_hdd_mean     = mean(bill_hdd, na.rm = T),
      bill_hdd_sum      = sum(bill_hdd, na.rm = T),
      bill_mean         = mean(total_bill, na.rm = T),
      bill_sum          = sum(total_bill, na.rm = T),
      care_pct          = mean(care, na.rm = T),
      hh_count          = .N
      ), by = ym]
    # Order by month of sample
    setorder(zip_summary, ym)
    # Summarize key variables by month and CARE status
    care_summary <- zip_dt[, list(
      allow_day_mean    = mean(allowance / days),
      price_base_mean   = mean(price_base, na.rm = T),
      price_excess_mean = mean(price_excess, na.rm = T),
      thm_mean          = mean(thm, na.rm = T),
      thm_sum           = sum(thm, na.rm = T),
      thm_base_mean     = mean(thm_base, na.rm = T),
      thm_base_sum      = sum(thm_base, na.rm = T),
      thm_excess_mean   = mean(thm_excess, na.rm = T),
      thm_excess_sum    = sum(thm_excess, na.rm = T),
      bill_hdd_mean     = mean(bill_hdd, na.rm = T),
      bill_hdd_sum      = sum(bill_hdd, na.rm = T),
      bill_mean         = mean(total_bill, na.rm = T),
      bill_sum          = sum(total_bill, na.rm = T),
      care_pct          = mean(care, na.rm = T),
      hh_count          = .N
      ), by = list(ym, care)]
    # Order by month of sample
    setorder(care_summary, ym)
    # Save the two data.tables
    saveRDS(object = zip_summary,
      file = paste0(dir_rds, "Summaries/Zip5PGE/summaryPGE", i, ".rds"))
    saveRDS(object = care_summary,
      file = paste0(dir_rds, "Summaries/Zip5CarePGE/summaryPGE", i, ".rds"))
    }
  # Kill the cluster
  stopCluster(cl)
  # Clean up again
  gc()

# SoCal data work --------------------------------------------------------------
  # Start the cluster
  cl <- makeCluster(4)
  registerDoParallel(cl)
  # Loop over the zip codes
  foreach(i = socal_zips,
    .packages = c("data.table", "magrittr", "lubridate", "stringr")) %dopar% {
    # Garbage control
    gc()
    # Load the zip codes' file
    zip_dt <- readRDS(paste0(dir_socal, "prices_socal_", i, ".rds"))
    # Keep bills with 28-34 days (matches selection in regression work)
    zip_dt <- zip_dt[between(days, 28, 34)]
    # If ppps_charge is missing, make it zero (started halfway through data)
    zip_dt[is.na(ppps_charge), ppps_charge := 0]
    # Calculate total bill
    zip_dt[, total_bill := thm_base * price_base + thm_excess * price_excess +
      thm * ppps_charge]
    # Summarize key variables by month
    zip_summary <- zip_dt[, list(
      allow_day_mean    = mean(allowance / days),
      price_base_mean   = mean(price_base, na.rm = T),
      price_excess_mean = mean(price_excess, na.rm = T),
      thm_mean          = mean(thm, na.rm = T),
      thm_sum           = sum(thm, na.rm = T),
      thm_base_mean     = mean(thm_base, na.rm = T),
      thm_base_sum      = sum(thm_base, na.rm = T),
      thm_excess_mean   = mean(thm_excess, na.rm = T),
      thm_excess_sum    = sum(thm_excess, na.rm = T),
      bill_hdd_mean     = mean(bill_hdd, na.rm = T),
      bill_hdd_sum      = sum(bill_hdd, na.rm = T),
      bill_mean         = mean(total_bill, na.rm = T),
      bill_sum          = sum(total_bill, na.rm = T),
      care_pct          = mean(care, na.rm = T),
      hh_count          = .N
      ), by = ym]
    # Order by month of sample
    setorder(zip_summary, ym)
    # Summarize key variables by month and CARE status
    care_summary <- zip_dt[, list(
      allow_day_mean    = mean(allowance / days),
      price_base_mean   = mean(price_base, na.rm = T),
      price_excess_mean = mean(price_excess, na.rm = T),
      thm_mean          = mean(thm, na.rm = T),
      thm_sum           = sum(thm, na.rm = T),
      thm_base_mean     = mean(thm_base, na.rm = T),
      thm_base_sum      = sum(thm_base, na.rm = T),
      thm_excess_mean   = mean(thm_excess, na.rm = T),
      thm_excess_sum    = sum(thm_excess, na.rm = T),
      bill_hdd_mean     = mean(bill_hdd, na.rm = T),
      bill_hdd_sum      = sum(bill_hdd, na.rm = T),
      bill_mean         = mean(total_bill, na.rm = T),
      bill_sum          = sum(total_bill, na.rm = T),
      care_pct          = mean(care, na.rm = T),
      hh_count          = .N
      ), by = list(ym, care)]
    # Order by month of sample
    setorder(care_summary, ym)
    # Save the two data.tables
    saveRDS(object = zip_summary,
      file = paste0(dir_rds, "Summaries/Zip5SoCal/summarySoCal", i, ".rds"))
    saveRDS(object = care_summary,
      file = paste0(dir_rds, "Summaries/Zip5CareSoCal/summarySoCal", i, ".rds"))
    }
  # Kill the cluster
  stopCluster(cl)
  # Clean up again
  gc()

# Combine PG&E's summary files -------------------------------------------------
  # Combine the PG&E non-CARE files
  pge_dt <- lapply(X = pge_zips, FUN = function(i) {
    # Load the file
    tmp_dt <- readRDS(paste0(dir_rds,
      "Summaries/Zip5PGE/summaryPGE", i, ".rds"))
    # Add the zipcode
    tmp_dt[, zip5 := i]
  }) %>% rbindlist()
  # Add utility
  pge_dt[, utility := "pge"]
  # Save
  saveRDS(object = pge_dt,
    file = paste0(dir_rds, "Summaries/zip5PGE.rds"))
  # Combine the PG&E CARE files
  pge_dt <- lapply(X = pge_zips, FUN = function(i) {
    # Load the file
    tmp_dt <- readRDS(paste0(dir_rds,
      "Summaries/Zip5CarePGE/summaryPGE", i, ".rds"))
    # Add the zipcode
    tmp_dt[, zip5 := i]
  }) %>% rbindlist()
  # Add utility
  pge_dt[, utility := "pge"]
  # Save
  saveRDS(object = pge_dt,
    file = paste0(dir_rds, "Summaries/zip5CarePGE.rds"))

# Combine SoCal's summary files ------------------------------------------------
  # Combine the PG&E non-CARE files
  socal_dt <- lapply(X = socal_zips, FUN = function(i) {
    # Load the data
    tmp_dt <- readRDS(paste0(dir_rds,
      "Summaries/Zip5SoCal/summarySoCal", i, ".rds"))
    # Add the utility
    tmp_dt[, zip5 := i]
  }) %>% rbindlist()
  # Add utility
  socal_dt[, utility := "socal"]
  # Save
  saveRDS(object = socal_dt,
    file = paste0(dir_rds, "Summaries/zip5SoCal.rds"))
  # Combine the PG&E CARE files
  socal_dt <- lapply(X = socal_zips, FUN = function(i) {
    # Load the file
    tmp_dt <- readRDS(paste0(dir_rds,
      "Summaries/Zip5CareSoCal/summarySoCal", i, ".rds"))
    # Add the zipcode
    tmp_dt[, zip5 := i]
  }) %>% rbindlist()
  # Add utility
  socal_dt[, utility := "socal"]
  # Save
  saveRDS(object = pge_dt,
    file = paste0(dir_rds, "Summaries/zip5CareSoCal.rds"))
